安装Hadoop

首先解压,在/opt 环境中解压 hadoop 压缩包

cd /opt
tar zxvf hadoop-3.3.4.tar.gz
mv hadoop-3.3.4 hadoop
sudo chown -R hadoop:hadoop /opt/hadoop
//切换成hadoop用户执行以下操作
su hadoop

添加 hadoop 用户免密登录

ssh-keygen
ssh-copy-id 192.168.122.24//改成本机ip地址

添加 hadoop 环境变量

sudo cat > /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=/opt/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
EOF

应用环境变量 sudo source /etc/profile
设置 hadoop 运行环境

cat > /opt/hadoop/etc/hadoop/hadoop-env.sh <<EOF
export HDFS_NAMENODE_USER=hadoop
export HDFS_DATANODE_USER=hadoop
export HDFS_SECONDARYNAMENODE_USER=hadoop
export YARN_RESOURCEMANAGER_USER=hadoop
export YARN_NODEMANAGER_USER=hadoop
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
EOF

进入 hadoop 进行配置

cd /opt/hadoop/etc/hadoop
vim core-site.xml

中添加

<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.122.24:9000</value><!--改为自己ip-->
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>

修改 hdfs-site.xml
中添加

<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/datanode</value>
</property>

修改 yarn-site.xml

<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--改成本机ip-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>192.168.122.24</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>24576</value> <!-- 24GB -->
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>6144</value> <!-- 单个容器最大分配内存,例如 4GB -->
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value> <!-- 单个容器最小分配内存,例如 1GB -->
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value> <!-- 设置单个 NodeManager 最大可用 16 个 CPU 核心 -->
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>8</value> <!-- 单容器最多分配 8 核 -->
</property>

hive默认使用mapreduce,如果在dss中需要使用hiveSql,修改mapred-site.xml

<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
	<name>yarn.app.mapreduce.am.env</name>
	<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
	<name>mapreduce.map.env</name>
	<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
	<name>mapreduce.reduce.env</name>
	<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property> 
    <name>mapreduce.application.classpath</name>
	<value>/opt/hadoop/share/hadoop/mapreduce/*,/opt/hadoop/share/hadoop/common/*,/opt/hadoop/share/hadoop/common/lib/*,/opt/hadoop/share/hadoop/yarn/*,/opt/hadoop/share/hadoop/yarn/lib/*,/opt/hadoop/share/hadoop/hdfs/*,/opt/hadoop/share/hadoop/hdfs/lib/*</value>
</property>

配置工作节点
添加 workers 文件,没有则新建

cat > workers << EOF
localhost
EOF

初始化 hdfs,格式化 namenode

hdfs namenode -format
cd /opt/hadoop
sbin/start-dfs.sh

启动 yarn

sbin/start-yarn.sh

确认 hadoop 所有服务开启

[hadoop@localhost hadoop]$ jps
14948 DataNode
16231 Jps
15224 SecondaryNameNode
14794 NameNode
15755 NodeManager
15612 ResourceManager